iT邦幫忙

2025 iThome 鐵人賽

DAY 12
0
生成式 AI

練習AI系列 第 13

AI 翻譯器(多語、術語表、品質檢查)

  • 分享至 

  • xImage
  •  

🆕 新增/修改的程式碼

  1. src/utils/lang.js(新增)
    // src/utils/lang.js
    export const LANG_MAP = {
    "zh-TW": "繁體中文",
    "zh-CN": "簡體中文",
    "en": "English",
    "ja": "日本語",
    "ko": "한국어",
    "es": "Español",
    "fr": "Français",
    "de": "Deutsch",
    };

export function normalizeLang(code) {
if (!code) return "";
const c = code.trim();
if (LANG_MAP[c]) return c;
// 寬鬆別名
const lower = c.toLowerCase();
if (lower === "zh" || lower === "zh_tw" || lower === "zh-hant") return "zh-TW";
if (lower === "zh_cn" || lower === "zh-hans") return "zh-CN";
return LANG_MAP[c] ? c : "";
}

export function assertSupported(target) {
if (!LANG_MAP[target]) {
const list = Object.keys(LANG_MAP).join(", ");
throw new Error(不支援的語言代碼:${target},請使用其中之一:${list});
}
}

  1. src/day12_translator.js(新增)
    // src/day12_translator.js
    import { openai } from "./aiClient.js";
    import { LANG_MAP, normalizeLang, assertSupported } from "./utils/lang.js";

/**

  • 將 glossary 轉成提示文字(雙向約束)
  • glossary 例:
  • [{ source: "Retrieval-Augmented Generation", target: "檢索增強生成" },
  • { source: "embedding", target: "嵌入向量" }]
    */
    function buildGlossaryHint(glossary = []) {
    if (!Array.isArray(glossary) || glossary.length === 0) return "";
    const lines = glossary
    .filter(g => g?.source && g?.target)
    .map(g => - ${g.source} => ${g.target});
    if (!lines.length) return "";
    return [
    "嚴格遵守下列術語對照,不可擅自改動:",
    lines.join("\n"),
    "若原文已是目標語言但包含上述術語,仍需以對照表標準化。"
    ].join("\n");
    }

/**

  • 嘗試保護 Markdown 程式碼區塊:要求模型原樣保留 區段、不翻譯其中內容 */ function buildFormatProtectionHint(preserveFormat = true) { if (!preserveFormat) return ""; return [ "如原文含 Markdown/程式碼區塊(inline),",
    "請原樣保留程式碼與符號,僅翻譯敘述文字;連結的 URL 不要翻譯。"
    ].join("\n");
    }

/**

  • 單筆翻譯
  • @param {Object} opts
  • @param {string} opts.text - 原文
  • @param {string} [opts.sourceLang] - 來源語言代碼(可省略,自動判斷)
  • @param {string} opts.targetLang - 目標語言代碼(必填,如 zh-TW / en / ja ...)
  • @param {Array<{source:string,target:string}>} [opts.glossary] - 術語對照
  • @param {boolean} [opts.preserveFormat=true] - 保留 Markdown/程式碼格式
  • @param {("formal"|"neutral"|"casual")} [opts.tone="neutral"] - 語氣
  • @returns {Promise<{ source:string, target:string, detectedSourceLang?:string }>}
    */
    export async function translateOne(opts = {}) {
    const {
    text = "",
    sourceLang = "",
    targetLang = "zh-TW",
    glossary = [],
    preserveFormat = true,
    tone = "neutral",
    } = opts;

if (!text?.trim()) throw new Error("text 為必填。");
const tgt = normalizeLang(targetLang) || targetLang;
assertSupported(tgt);

const sys = [
你是專業的翻譯員,將輸入文字翻譯為 ${LANG_MAP[tgt]}。,
語氣:${tone};除非為必要語法調整,請忠實於原文。,
buildFormatProtectionHint(preserveFormat),
buildGlossaryHint(glossary),
"若輸入已是目標語言,請僅進行用詞標準化與小幅潤飾,不可改變技術含義。",
].filter(Boolean).join("\n");

const user = [
sourceLang ? 來源語言:${LANG_MAP[normalizeLang(sourceLang)] || sourceLang} : "來源語言:自動判斷",
"請只回覆純文字譯文,不要加註任何說明。",
"==== 原文開始 ====",
text,
"==== 原文結束 ====",
].join("\n");

const res = await openai.chat.completions.create({
model: "gpt-4o-mini",
temperature: 0.2,
messages: [
{ role: "system", content: sys },
{ role: "user", content: user }
],
});

const target = res.choices?.[0]?.message?.content?.trim() || "";
return { source: text, target, detectedSourceLang: sourceLang ? undefined : "(model-detected)" };
}

/**

  • 批次翻譯
  • @param {Object} opts
  • @param {string[]} opts.items - 欲翻譯的字串陣列
  • @param {string} opts.targetLang
  • @param {string} [opts.sourceLang]
  • @param {Array} [opts.glossary]
  • @param {boolean} [opts.preserveFormat]
  • @param {string} [opts.tone]
  • @returns {Promise<Array<{source:string,target:string,detectedSourceLang?:string}>>}
    */
    export async function translateBatch(opts = {}) {
    const { items = [], ...rest } = opts;
    if (!Array.isArray(items) || items.length === 0) throw new Error("items 為必填陣列。");
    const results = [];
    for (const t of items) {
    // 簡單串行;若要大量翻譯,可引入 p-limit 控制並行度
    const r = await translateOne({ text: t, ...rest });
    results.push(r);
    }
    return results;
    }

/**

  • 品質檢查(QE:Quality Estimation)
  • 回傳 adequacy / fluency / terminology 三個 0~1 分數與建議。
  • @param {string} source 原文
  • @param {string} target 譯文
  • @param {Array} [glossary]
  • @returns {Promise<{scores:{adequacy:number,fluency:number,terminology:number}, suggestions:string[]}>}
    */
    export async function qualityCheck(source, target, glossary = []) {
    if (!source?.trim() || !target?.trim()) throw new Error("source 與 target 皆不可為空。");

const glos = buildGlossaryHint(glossary);
const sys = [
"你是專業的譯後品質評估員,請針對給定的原文與譯文評分與提出具體建議。",
"請以純 JSON 回覆,格式:",
"{"scores":{"adequacy":0.0,"fluency":0.0,"terminology":0.0},"suggestions":["..."]}",
"分數 0~1,小數到兩位。adequacy=忠實度;fluency=流暢度;terminology=術語一致性。",
glos
].filter(Boolean).join("\n");

const user = [
"=== 原文 ===",
source,
"=== 譯文 ===",
target,
].join("\n");

const res = await openai.chat.completions.create({
model: "gpt-4o-mini",
temperature: 0.1,
messages: [
{ role: "system", content: sys },
{ role: "user", content: user }
],
});

const raw = res.choices?.[0]?.message?.content?.trim() || "{}";
const json = raw.match(/(?:json)?\s*([\s\S]*?)/i)?.[1] ?? raw;
const obj = JSON.parse(json);
return obj;
}

  1. index.js(修改:加入翻譯入口)
    // index.js(節錄新增 translate 分支,其餘保留原狀)
    import { translateOne, translateBatch, qualityCheck } from "./src/day12_translator.js";

const args = Object.fromEntries(
process.argv.slice(2).reduce((acc, cur, i, arr) => {
if (cur.startsWith("--")) {
const key = cur.replace(/^--/, "");
const val = arr[i + 1] && !arr[i + 1].startsWith("--") ? arr[i + 1] : true;
acc.push([key, val]);
}
return acc;
}, [])
);

async function main() {
const task = args.task || "chat";

if (task === "translate") {
const mode = args.mode || "one"; // one | batch | qe
const targetLang = args.to || "zh-TW";
const sourceLang = args.from || ""; // 可留空自動判斷

// 解析 glossary:"A:B,C:D"
const glossary = (args.glossary || "")
  .split(",")
  .map(p => p.trim())
  .filter(Boolean)
  .map(pair => {
    const [source, target] = pair.split(":").map(s => s?.trim()).filter(Boolean);
    return source && target ? { source, target } : null;
  })
  .filter(Boolean);

if (mode === "one") {
  const text = args.text || "RAG enables retrieval over private knowledge bases.";
  const out = await translateOne({
    text,
    sourceLang,
    targetLang,
    glossary,
    preserveFormat: args.keepfmt !== "false",
    tone: args.tone || "neutral",
  });
  console.log("\n=== 單筆翻譯 ===\n");
  console.log(out.target);

} else if (mode === "batch") {
  // 批次:用分號 ;; 分隔
  const raw = args.texts || "Hello;;Good morning;;This is a test.";
  const items = raw.split(";;").map(s => s.trim()).filter(Boolean);
  const out = await translateBatch({
    items,
    sourceLang,
    targetLang,
    glossary,
    preserveFormat: args.keepfmt !== "false",
    tone: args.tone || "neutral",
  });
  console.log("\n=== 批次翻譯(JSON) ===\n");
  console.log(JSON.stringify(out, null, 2));

} else if (mode === "qe") {
  const src = args.src || "RAG enables retrieval over private knowledge bases.";
  const tgt = args.tgt || "RAG 讓你可以在私有知識庫上進行檢索。";
  const out = await qualityCheck(src, tgt, glossary);
  console.log("\n=== 品質檢查(QE) ===\n");
  console.log(JSON.stringify(out, null, 2));

} else {
  console.log("未知模式,請使用 --mode one | batch | qe");
}

} else {
// ...你原本的其他 task 分支(chat, image, vision, stt, tts, mm, docsum 等)
}
}

main().catch((e) => {
console.error("發生錯誤:", e.message);
process.exit(1);
});

  1. package.json(新增 Scripts)
    {
    "scripts": {
    "day12:one": "node index.js --task translate --mode one --from en --to zh-TW --text "RAG enables retrieval over private knowledge bases." --glossary "Retrieval-Augmented Generation:檢索增強生成,embedding:嵌入向量"",
    "day12:batch": "node index.js --task translate --mode batch --from en --to zh-TW --texts "Hello;;This is a test;;Language models are useful."",
    "day12:qe": "node index.js --task translate --mode qe --src "Please cancel my order." --tgt "請幫我取消訂單。""
    }
    }

▶️ CLI 使用範例

1) 單筆翻譯 + 術語表

npm run day12:one --silent

2) 批次翻譯(以 ;; 分隔多句,輸出 JSON)

npm run day12:batch --silent

3) 譯後品質檢查(回傳分數與建議,JSON)

npm run day12:qe --silent

4) 自動判斷來源語言,翻成英文,保留 Markdown 格式

node index.js --task translate --mode one --to en --text "請看這段程式碼:const x=1; 不要翻譯裡面的內容。


上一篇
文件摘要器(PDF/TXT/MD/DOCX → 結構化摘要)
下一篇
AI 程式碼助理(解釋/重構/風險評審/單元測試草稿)
系列文
練習AI16
圖片
  熱門推薦
圖片
{{ item.channelVendor }} | {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友留言

立即登入留言